rm(list = ls())

here::i_am(file.path("code", "03_iv_heterogeneity.R"))
library(here)
source(here("code", "config.R"))

est_df <- read_parquet(here("data", "analysis", "analysis.parquet")) |>
  mutate(
    occscore = ifelse(empstat_c1930 == 20 | empstat_c1930 == 30, 0, occscore_c1930),
    unemployed = empstat_c1930 == 20
  )

#################################################################################
# Heterogeneity by card characteristics

tsls1 <- feols(
  c(is_naacp, occscore, unemployed) ~ 1 | birthyr_cards + bpl_cards + board_identifier + exemption^married_cards +
    farmer + laborer + farm_laborer | vet_combined ~ order_num_serial_norm,
  data = est_df,
  split = ~ farmer,
  cluster = ~ serial_num
)

tsls1_std <- feols(
  c(is_naacp_z, occscore_z, unemployed_z) ~ 1 | birthyr_cards + bpl_cards + board_identifier + exemption^married_cards +
    farmer + laborer + farm_laborer | vet_combined ~ order_num_serial_norm,
  data = est_df |>
    group_by(farmer) |>
    mutate(
      is_naacp_z = scale(is_naacp)[,1],
      occscore_z = scale(occscore)[,1],
      unemployed_z = scale(unemployed)[,1]
    ),
  split = ~ farmer,
  cluster = ~ serial_num
)

tsls2 <- feols(
  c(is_naacp, occscore, unemployed) ~ 1 | birthyr_cards + bpl_cards + board_identifier + exemption^married_cards +
    farmer + laborer + farm_laborer | vet_combined ~ order_num_serial_norm,
  data = est_df,
  split = ~ laborer,
  cluster = ~ serial_num
)

tsls2_std <- feols(
  c(is_naacp_z, occscore_z, unemployed_z) ~ 1 | birthyr_cards + bpl_cards + board_identifier + exemption^married_cards +
    farmer + laborer + farm_laborer | vet_combined ~ order_num_serial_norm,
  data = est_df |>
    group_by(laborer) |>
    mutate(
      is_naacp_z = scale(is_naacp)[,1],
      occscore_z = scale(occscore)[,1],
      unemployed_z = scale(unemployed)[,1]
    ),
  split = ~ laborer,
  cluster = ~ serial_num
)

tsls3 <- feols(
  c(is_naacp, occscore, unemployed) ~ 1 | birthyr_cards + bpl_cards + board_identifier + exemption^married_cards +
    farmer + laborer + farm_laborer | vet_combined ~ order_num_serial_norm,
  data = est_df,
  split = ~ married_cards,
  cluster = ~ serial_num
)

tsls3_std <- feols(
  c(is_naacp_z, occscore_z, unemployed_z) ~ 1 | birthyr_cards + bpl_cards + board_identifier + exemption^married_cards +
    farmer + laborer + farm_laborer | vet_combined ~ order_num_serial_norm,
  data = est_df |>
    group_by(married_cards) |>
    mutate(
      is_naacp_z = scale(is_naacp)[,1],
      occscore_z = scale(occscore)[,1],
      unemployed_z = scale(unemployed)[,1]
    ),
  split = ~ married_cards,
  cluster = ~ serial_num
)

tsls4 <- feols(
  c(is_naacp, occscore, unemployed) ~ 1 | birthyr_cards + bpl_cards + board_identifier + exemption^married_cards +
    farmer + laborer + farm_laborer | vet_combined ~ order_num_serial_norm,
  data = est_df,
  split = ~ exemption,
  cluster = ~ serial_num
)

tsls4_std <- feols(
  c(is_naacp_z, occscore_z, unemployed_z) ~ 1 | birthyr_cards + bpl_cards + board_identifier + exemption^married_cards +
    farmer + laborer + farm_laborer | vet_combined ~ order_num_serial_norm,
  data = est_df |>
    group_by(exemption) |>
    mutate(
      is_naacp_z = scale(is_naacp)[,1],
      occscore_z = scale(occscore)[,1],
      unemployed_z = scale(unemployed)[,1]
    ),
  split = ~ exemption,
  cluster = ~ serial_num
)

tsls5 <- feols(
  c(is_naacp, occscore, unemployed) ~ 1 | birthyr_cards + bpl_cards + board_identifier + exemption^married_cards +
    farmer + laborer + farm_laborer | vet_combined ~ order_num_serial_norm,
  data = est_df,
  split = ~ confed_state_cards,
  cluster = ~ serial_num
)

tsls5_std <- feols(
  c(is_naacp_z, occscore_z, unemployed_z) ~ 1 | birthyr_cards + bpl_cards + board_identifier + exemption^married_cards +
    farmer + laborer + farm_laborer | vet_combined ~ order_num_serial_norm,
  data = est_df |>
    group_by(confed_state_cards) |>
    mutate(
      is_naacp_z = scale(is_naacp)[,1],
      occscore_z = scale(occscore)[,1],
      unemployed_z = scale(unemployed)[,1]
    ),
  split = ~ confed_state_cards,
  cluster = ~ serial_num
)

tsls6 <- feols(
  c(is_naacp, occscore, unemployed) ~ 1 | birthyr_cards + bpl_cards + board_identifier + exemption^married_cards +
    farmer + laborer + farm_laborer | vet_combined ~ order_num_serial_norm,
  data = est_df |>
    mutate(young = birthyr_cards > 1892),
  split = ~ young,
  cluster = ~ serial_num
)

tsls6_std <- feols(
  c(is_naacp_z, occscore_z, unemployed_z) ~ 1 | birthyr_cards + bpl_cards + board_identifier + exemption^married_cards +
    farmer + laborer + farm_laborer | vet_combined ~ order_num_serial_norm,
  data = est_df |>
    mutate(young = birthyr_cards > 1892) |>
    group_by(young) |>
    mutate(
      is_naacp_z = scale(is_naacp)[,1],
      occscore_z = scale(occscore)[,1],
      unemployed_z = scale(unemployed)[,1]
    ),
  split = ~ young,
  cluster = ~ serial_num
)

coefs <- bind_rows(
  coeftable(tsls1),
  coeftable(tsls2),
  coeftable(tsls3),
  coeftable(tsls4),
  coeftable(tsls5),
  coeftable(tsls6),
  coeftable(tsls1_std),
  coeftable(tsls2_std),
  coeftable(tsls3_std),
  coeftable(tsls4_std),
  coeftable(tsls5_std),
  coeftable(tsls6_std)
) |>
  mutate(
    sample = ifelse(sample, "Yes", "No"),
    split = factor(
      sample.var,
      levels = c("farmer", "laborer", "married_cards", "exemption", "confed_state_cards", "young"),
      labels = c("Farmer", "Laborer", "Married", "Exemption\nclaim", "Confederate\nstate", "Born\nafter 1892")
    )
  )

p_color <- coefs |>
  filter(lhs == "is_naacp") |>
  ggplot(aes(y = Estimate, ymin = Estimate - 1.96 * `Std. Error`, ymax = Estimate + 1.96 * `Std. Error`,
             x = split, color = sample, shape = sample)) +
  geom_hline(yintercept = 0, linetype = "dashed", alpha = .7) +
  geom_point(position = position_dodge2(.3), size = 2) +
  geom_linerange(position = position_dodge2(.3)) +
  my_theme() +
  ylim(-.05, NA) +
  labs(y ="Effect on NAACP\nmembership (TSLS)", x = "", color = "", shape = "")
ggsave(file.path(fig_dir, "color", "iv_heterogeneity.pdf"), plot = p_color, width = 6, height = 3)

p_bw <- p_color +
  scale_color_manual(values = c("No" = "black", "Yes" = "#555"))
ggsave(file.path(fig_dir, "bw", "iv_heterogeneity.pdf"), plot = p_bw, width = 6, height = 3)

p_color <- coefs |>
  filter(lhs == "is_naacp_z") |>
  ggplot(aes(y = Estimate, ymin = Estimate - 1.96 * `Std. Error`, ymax = Estimate + 1.96 * `Std. Error`,
             x = split, color = sample, shape = sample)) +
  geom_hline(yintercept = 0, linetype = "dashed", alpha = .7) +
  geom_point(position = position_dodge2(.3), size = 2) +
  geom_linerange(position = position_dodge2(.3)) +
  my_theme() +
  labs(y ="Standardized effect on\nNAACP membership (TSLS)", x = "", color = "", shape = "")
ggsave(file.path(fig_dir, "color", "iv_heterogeneity_std.pdf"), plot = p_color, width = 6, height = 3)

p_bw <- p_color +
  scale_color_manual(values = c("No" = "black", "Yes" = "#555"))
ggsave(file.path(fig_dir, "bw", "iv_heterogeneity_std.pdf"), plot = p_bw, width = 6, height = 3)

#################################################################################
# Heterogeneity by pre-war county racism

tsls <- feols(
  is_naacp ~ 1 | birthyr_cards + bpl_cards + board_identifier + exemption^married_cards +
    farmer + laborer + farm_laborer | vet_combined ~ order_num_serial_norm,
  data = est_df,
  split = ~ racism_county_cards,
  cluster = ~ serial_num
)

p <- map_dfr(tsls, broom::tidy, conf.int = T, .id = "sample") |>
  mutate(
    racism_county_cards = substr(sample, nchar(sample), nchar(sample))
  ) |>
  ggplot(aes(y = estimate, ymin = conf.low, ymax = conf.high, x = racism_county_cards)) +
  geom_hline(yintercept = 0, linetype = "dashed", alpha = .7) +
  my_theme() +
  ylim(-.05, NA) +
  labs(y ="Effect on NAACP\nmembership (TSLS)", x = "County prejudice score", color = "", shape = "")

p_color <- p +
  geom_point(size = 2, color = "#F8766D") +
  geom_linerange(color = "#F8766D")
ggsave(file.path(fig_dir, "color", "iv_heterogeneity_county.pdf"), plot = p_color, width = 3.6, height = 2.8)

p_bw <- p +
  geom_point(size = 2) +
  geom_linerange()
ggsave(file.path(fig_dir, "bw", "iv_heterogeneity_county.pdf"), plot = p_bw, width = 3.6, height = 2.8)

tsls_std <- feols(
  is_naacp_z ~ 1 | birthyr_cards + bpl_cards + board_identifier + exemption^married_cards +
    farmer + laborer + farm_laborer | vet_combined ~ order_num_serial_norm,
  data = est_df |>
    group_by(racism_county_cards) |>
    mutate(is_naacp_z = scale(is_naacp)[,1]),
  split = ~ racism_county_cards,
  cluster = ~ serial_num
)

p <- map_dfr(tsls_std, broom::tidy, conf.int = T, .id = "sample") |>
  mutate(
    racism_county_cards = substr(sample, nchar(sample), nchar(sample))
  ) |>
  ggplot(aes(y = estimate, ymin = conf.low, ymax = conf.high, x = racism_county_cards)) +
  geom_hline(yintercept = 0, linetype = "dashed", alpha = .7) +
  my_theme() +
  labs(y ="Standardized effect on\nNAACP membership (TSLS)", x = "County prejudice score", color = "", shape = "")

p_color <- p +
  geom_point(size = 2, color = "#F8766D") +
  geom_linerange(color = "#F8766D")
ggsave(file.path(fig_dir, "color", "iv_heterogeneity_county_std.pdf"), plot = p_color, width = 3.6, height = 2.8)

p_bw <- p +
  geom_point(size = 2) +
  geom_linerange()
ggsave(file.path(fig_dir, "bw", "iv_heterogeneity_county_std.pdf"), plot = p_bw, width = 3.6, height = 2.8)

# Effects on occup. income and unemployment by county racism score
tsls_occ <- feols(
  occscore_z ~ 1 | birthyr_cards + bpl_cards + board_identifier + exemption^married_cards +
    farmer + laborer + farm_laborer | vet_combined ~ order_num_serial_norm,
  data = est_df |>
    mutate(
      occscore = ifelse(empstat_c1930 %in% c(20, 30), 0, occscore_c1930),
      unemployed = empstat_c1930 == 20,
      across(c(occscore, unemployed), list(z = \(x) scale(x)[,1]))
    ),
  split = ~ racism_county_cards,
  cluster = ~ serial_num
)

tsls_unemp <- feols(
  unemployed_z ~ 1 | birthyr_cards + bpl_cards + board_identifier + exemption^married_cards +
    farmer + laborer + farm_laborer | vet_combined ~ order_num_serial_norm,
  data = est_df |>
    mutate(
      occscore = ifelse(empstat_c1930 %in% c(20, 30), 0, occscore_c1930),
      unemployed = empstat_c1930 == 20,
      across(c(occscore, unemployed), list(z = \(x) scale(x)[,1]))
    ),
  split = ~ racism_county_cards,
  cluster = ~ serial_num
)

p_color <- bind_rows(
  map_dfr(tsls_occ, broom::tidy, conf.int = T, .id = "sample") |> mutate(outc = "Occup. income"),
  map_dfr(tsls_unemp, broom::tidy, conf.int = T, .id = "sample") |> mutate(outc = "Unemployed")
) |>
  mutate(
    racism_county_cards = substr(sample, nchar(sample), nchar(sample))
  ) |>
  ggplot(aes(y = estimate, ymin = conf.low, ymax = conf.high, x = racism_county_cards, color = outc, shape = outc)) +
  geom_hline(yintercept = 0, linetype = "dashed", alpha = .7) +
  geom_point(size = 2, position = position_dodge(.3)) +
  geom_linerange(position = position_dodge(.3)) +
  my_theme() +
  ylim(-1, 1) +
  labs(y = "Standardized effect (TSLS)", x = "County prejudice score", color = "", shape = "")
ggsave(file.path(fig_dir, "color", "iv_heterogeneity_county_income.pdf"), plot = p_color, width = 3.6, height = 2.8)

p_bw <- p_color +
  scale_color_manual(values = c("Occup. income" = "black", "Unemployed" = "#555"))
ggsave(file.path(fig_dir, "bw", "iv_heterogeneity_county_income.pdf"), plot = p_bw, width = 3.6, height = 2.8)

#################################################################################
# Heterogeneity by post-war racial violence (Red Summer)

tsls <- feols(
  is_naacp ~ 1 | birthyr_cards + bpl_cards + board_identifier + exemption^married_cards +
    farmer + laborer + farm_laborer | vet_combined ~ order_num_serial_norm,
  data = est_df,
  split = ~ redsummer_county_cards,
  cluster = ~ serial_num
)

p <- map_dfr(tsls, broom::tidy, conf.int = TRUE, .id = "sample") |>
  mutate(
    redsummer = ifelse(
      str_detect(sample, "TRUE"),
      "Yes",
      "No"
    )
  ) |>
  ggplot(aes(y = estimate, ymin = conf.low, ymax = conf.high, x = redsummer)) +
  geom_hline(yintercept = 0, linetype = "dashed", alpha = .7) +
  my_theme() +
  ylim(-.05, NA) +
  labs(y ="Effect on NAACP\nmembership (TSLS)", x = "Racial violence in Red Summer", color = "", shape = "")

p_color <- p +
  geom_point(size = 2, color = "#F8766D") +
  geom_linerange(color = "#F8766D")
ggsave(file.path(fig_dir, "color", "iv_heterogeneity_redsummer.pdf"), plot = p_color, width = 3.6, height = 2.8)

p_bw <- p +
  geom_point(size = 2) +
  geom_linerange()
ggsave(file.path(fig_dir, "bw", "iv_heterogeneity_redsummer.pdf"), plot = p_bw, width = 3.6, height = 2.8)

tsls_std <- feols(
  is_naacp_z ~ 1 | birthyr_cards + bpl_cards + board_identifier + exemption^married_cards +
    farmer + laborer + farm_laborer | vet_combined ~ order_num_serial_norm,
  data = est_df |>
    group_by(redsummer_county_cards) |>
    mutate(is_naacp_z = scale(is_naacp)[,1]),
  split = ~ redsummer_county_cards,
  cluster = ~ serial_num
)

p <- map_dfr(tsls_std, broom::tidy, conf.int = T, .id = "sample") |>
  mutate(
    redsummer = ifelse(
      str_detect(sample, "TRUE"),
      "Yes",
      "No"
    )
  ) |>
  ggplot(aes(y = estimate, ymin = conf.low, ymax = conf.high, x = redsummer)) +
  geom_hline(yintercept = 0, linetype = "dashed", alpha = .7) +
  my_theme() +
  labs(y ="Standardized effect on\nNAACP membership (TSLS)", x = "Racial violence in Red Summer", color = "", shape = "")

p_color <- p +
  geom_point(size = 2, color = "#F8766D") +
  geom_linerange(color = "#F8766D")
ggsave(file.path(fig_dir, "color", "iv_heterogeneity_redsummer_std.pdf"), plot = p_color, width = 3.6, height = 2.8)

p_bw <- p +
  geom_point(size = 2) +
  geom_linerange()
ggsave(file.path(fig_dir, "bw", "iv_heterogeneity_redsummer_std.pdf"), plot = p_bw, width = 3.6, height = 2.8)

# Effects on occup. income and unemployment by postwar racial violence
tsls_occ <- feols(
  occscore_z ~ 1 | birthyr_cards + bpl_cards + board_identifier + exemption^married_cards +
    farmer + laborer + farm_laborer | vet_combined ~ order_num_serial_norm,
  data = est_df |>
    mutate(
      occscore = ifelse(empstat_c1930 %in% c(20, 30), 0, occscore_c1930),
      unemployed = empstat_c1930 == 20,
      across(c(occscore, unemployed), list(z = \(x) scale(x)[,1]))
    ),
  split = ~ redsummer_county_cards,
  cluster = ~ serial_num
)

tsls_unemp <- feols(
  unemployed_z ~ 1 | birthyr_cards + bpl_cards + board_identifier + exemption^married_cards +
    farmer + laborer + farm_laborer | vet_combined ~ order_num_serial_norm,
  data = est_df |>
    mutate(
      occscore = ifelse(empstat_c1930 %in% c(20, 30), 0, occscore_c1930),
      unemployed = empstat_c1930 == 20,
      across(c(occscore, unemployed), list(z = \(x) scale(x)[,1]))
    ),
  split = ~ redsummer_county_cards,
  cluster = ~ serial_num
)

p_color <- bind_rows(
  map_dfr(tsls_occ, broom::tidy, conf.int = T, .id = "sample") |> mutate(outc = "Occup. income"),
  map_dfr(tsls_unemp, broom::tidy, conf.int = T, .id = "sample") |> mutate(outc = "Unemployed")
) |>
  mutate(
    redsummer = ifelse(
      str_detect(sample, "TRUE"),
      "Yes",
      "No"
    )
  ) |>
  ggplot(aes(y = estimate, ymin = conf.low, ymax = conf.high, x = redsummer, color = outc, shape = outc)) +
  geom_hline(yintercept = 0, linetype = "dashed", alpha = .7) +
  geom_point(size = 2, position = position_dodge(.3)) +
  geom_linerange(position = position_dodge(.3)) +
  my_theme() +
  ylim(-1, 1) +
  labs(y ="Standardized effect (TSLS)", x = "Racial violence in Red Summer", color = "", shape = "")
ggsave(file.path(fig_dir, "color", "iv_heterogeneity_redsummer_income.pdf"), plot = p_color, width = 3.6, height = 2.8)

p_bw <- p_color +
  scale_color_manual(values = c("Occup. income" = "black", "Unemployed" = "#555"))
ggsave(file.path(fig_dir, "bw", "iv_heterogeneity_redsummer_income.pdf"), plot = p_bw, width = 3.6, height = 2.8)
